# China -------------------------------------------------------------------
library("maptools")
library(ggplot2)
library(maps)
library(usmap)
library(data.table)
library(ggsn) # for scale bar `scalebar`
library(ggrepel) # if need to repel labels 
library(rgdal)
library(rmarkdown)

china_map <- rgdal::readOGR("/Users/ykwang/Dropbox/Chinese food exports/Data/chinamap/bou2_4p.shp")
# extract province information from shap file
china_province = setDT(china_map@data)
setnames(china_province, "NAME", "province")

# transform to UTF-8 coding format
china_province[, province:=iconv(province, from = "GBK", to = "UTF-8")] 
# create id to join province back to lat and long, id = 0 ~ 924
china_province[, id:= .I-1] 
# there are more shapes for one province due to small islands
china_province[, table(province)]

china_province[, province:= as.factor(province)]

dt_china = setDT(fortify(china_map))
dt_china[, id:= as.numeric(id)]

setkey(china_province, id); setkey(dt_china, id)
dt_china <- china_province[dt_china]

# make the province EN, CH label file
province_CH <- china_province[, levels(province)] # the CH are in UTF-8 code
province_EN <- c("Shanghai", "Yunnan", "Inner Mongolia", "Beijing", "Taiwan",
                 "Jilin", "Sichuan", "Tianjin", "Ningxia", "Anhui",
                 "Shandong", "Shanxi", "Guangdong", "Guangxi ", "Xinjiang",
                 "Jiangsu", "Jiangxi", "Hebei", "Henan", "Zhejiang",
                 "Hainan", "Hubei", "Hunan", "Gansu", "Fujian",
                 "Tibet", "Guizhou", "Liaoning", "Chongqing", "Shaanxi",
                 "Qinghai", "Hong Kong", "Heilongjiang"
)

# gtrends data
value <- c(35, 43,  42,  50, NA,  36, 48,  55,  51, 56, 53, 46, 42, 43,  44, 46, 57, 100, 61, 41, 39, 45, 50, 58, 39,  53, 36, 44, 42, 44, 25, NA, 38)
input_data <- data.table(province_CH, province_EN, value)
setkey(input_data, province_CH)
setkey(dt_china, province)

# remove small islands on the South China Sea
china_map_pop <- input_data[dt_china[AREA>0.1,]]

# create label file of province `label_dt`
label_dt <- china_map_pop[, .(x = mean(range(long)), y = mean(range(lat)), province_EN, province_CH), by = id]
label_dt <- unique(label_dt)
setkey(label_dt, province_EN)
# fine-tuned the label position of some provinces
label_dt['Inner Mongolia', `:=` (x = 110, y = 42)]
label_dt['Gansu', `:=` (x = 96.3, y = 40)]
label_dt['Hebei', `:=` (x = 115.5, y = 38.5)]
label_dt['Liaoning', `:=` (x = 123, y = 41.5)]

# data look like this: 
rmarkdown::paged_table(china_map_pop[!is.na(province_CH),])

# plot
ggplot(china_map_pop, aes(x = long, y = lat, group = group, fill=value)) +
  labs(fill = "Search index for Sanlu")+
  geom_polygon()+
  geom_path()+
  scale_fill_gradientn(colours=rev(heat.colors(10)),na.value="grey90",
                       guide = guide_colourbar(barwidth = 0.8, barheight = 10)) + 
  blank() + 
  geom_text(data = label_dt, aes(x=x, y=y, label = province_EN),inherit.aes = F) +
  scalebar(data = china_map_pop, dist = 500, dist_unit = "km",
           transform = T, model = "WGS84",
           border.size = 0.4, st.size = 2) 

